Code
library(tidyverse)
library(lubridate)
library(zoo)
library(forecast)
library(tseries)
library(patchwork)
library(here)
library(plotly)zoo un park

uk = read.csv("../data/fx rate/UK_US.csv")
uk <- uk %>% mutate(observation_date = as.Date(observation_date))%>%
rename(UK_USD = DEXUSUK)%>%
mutate(UK_USD = na.locf(UK_USD, na.rm = FALSE))
uk_plot <- ggplot(uk, aes(x = observation_date, y = UK_USD)) +
geom_line(color = "blue") +
labs(title = "GBP/USD Exchange Rate",
x = "Date", y = "GBP(GBP per 1 USD)") +
theme_minimal()
uk_plot
jp <- read_csv("../data/fx rate/JP_US.csv") %>%
mutate(observation_date = as.Date(observation_date)) %>%
rename(JP_USD = DEXJPUS,
Date = observation_date) %>%
mutate( JP_USD = na.locf(JP_USD, na.rm = FALSE),
USD_JP = 1 / JP_USD)
jp_plot <- ggplot(jp, aes(x = Date, y = USD_JP)) +
geom_line(color = "blue") +
labs(title = "JPY/USD Exchange Rate ",
x = "Date", y = "JPY(JPY per 1 USD)") +
theme_minimal()
jp_plot
crude_oil <- read_csv(here("data/oil","oil_cleaned.csv")) %>%
mutate(Date = as.Date(Date)) %>%
arrange(Date) %>%
mutate(across(-Date, ~ na.locf(.x, na.rm = FALSE)))
ggplot(crude_oil, aes(x = Date, y = WTI)) +
geom_line(color = "darkgreen") +
labs(title = "Crude Oil Prices", x = "Date", y = "Price") +
theme_minimal()
sp500 <- read_csv(here("data/sp500","sp500_cleaned.csv")) %>%
mutate(Date = as.Date(Date)) %>%
arrange(Date) %>%
mutate(across(-Date, ~ na.locf(.x, na.rm = FALSE)))
ggplot(sp500, aes(x = Date, y = Close)) +
geom_line(color = "blue") +
labs(title = "S&P 500 Index", x = "Date", y = "Level") +
theme_minimal()

spread<- read_csv("../data/yield/merge_yield.csv")%>%
mutate(Date = as.Date(Date),
Year = lubridate::year(Date)) %>%
arrange(Date)%>%
mutate(across(ends_with("_spread"), ~ na.locf(.x, na.rm = FALSE))) %>%
mutate(across(ends_with("_spread"), ~ na.locf(.x, fromLast = TRUE)))
jp_plot <- plot_ly(spread, x = ~Date, y = ~JP_Spread, type = 'scatter', mode = 'lines',
name = "Japan (10Y-2Y)") %>%
layout(title = "Japan Yield Curve Spread (10Y-2Y)",
xaxis = list(title = "Date"),
yaxis = list(title = "Spread (%)"))
jp_plotspread<- read_csv("../data/yield/merge_yield.csv")%>%
mutate(Date = as.Date(Date),
Year = lubridate::year(Date)) %>%
arrange(Date)%>%
mutate(across(ends_with("_spread"), ~ na.locf(.x, na.rm = FALSE))) %>%
mutate(across(ends_with("_spread"), ~ na.locf(.x, fromLast = TRUE)))
eu_plot <- plot_ly(spread, x = ~Date, y = ~EU_Spread, type = 'scatter', mode = 'lines',
name = "EU (10Y-2Y)") %>%
layout(title = "EU Yield Curve Spread (10Y-2Y)",
xaxis = list(title = "Date"),
yaxis = list(title = "Spread (%)"))
eu_plotspread<- read_csv("../data/yield/merge_yield.csv")%>%
mutate(Date = as.Date(Date),
Year = lubridate::year(Date)) %>%
arrange(Date)%>%
mutate(across(ends_with("_spread"), ~ na.locf(.x, na.rm = FALSE))) %>%
mutate(across(ends_with("_spread"), ~ na.locf(.x, fromLast = TRUE)))
uk_plot <- plot_ly(spread, x = ~Date, y = ~UK_Spread, type = 'scatter', mode = 'lines',
name = "UK (10Y-2Y)") %>%
layout(title = "UK Yield Curve Spread (10Y-2Y)",
xaxis = list(title = "Date"),
yaxis = list(title = "Spread (%)"))
uk_plotspread<- read_csv("../data/yield/merge_yield.csv")%>%
mutate(Date = as.Date(Date),
Year = lubridate::year(Date)) %>%
arrange(Date)%>%
mutate(across(ends_with("_spread"), ~ na.locf(.x, na.rm = FALSE))) %>%
mutate(across(ends_with("_spread"), ~ na.locf(.x, fromLast = TRUE)))
eu_plot <- plot_ly(spread, x = ~Date, y = ~US_Spread, type = 'scatter', mode = 'lines',
name = "US (10Y-2Y)") %>%
layout(title = "US Yield Curve Spread (10Y-2Y)",
xaxis = list(title = "Date"),
yaxis = list(title = "Spread (%)"))
eu_plotall the currencies against USD has same patterns, clear upwards until 2008 and plumet after this period, with recovery period until 2020 and also decreases severly. in real economic situation, this reflects 2008 economic crisis and 2020 pandemic. eventually time series FX rate against USD shows impact of economic crisis. this also reflects FX rate has cyclic patterns. also for seasonality, it is hard to detect it.
crude oil shows less movement relavantly to other assets. however, oil price also reflects economic situation, such as 2020 pandemic. one thing to note in crude oil price is that in terms of monthly, its movement has some kind of patterns.
S&P500 graph showing upward trend as time goes by, specifically sharp upward trend for 2023. this plot as reflects economic situation, for example in 2025 first quarter, it decreases sharply, which this time, there was a tariff war, made investment slump due to economic & policy unstability.
Bitcoin is very similar as S&P500 but much radical movement, after 2023, it increases very sharply. it is hard to find seasonality or cycllic pattern for this data
In yield curve spread( 10 year - 2 years ), three countries have similar pattern, but Japan has distinct pattern compare to other countries. Japan has increasing spread rate after 2020 whereas other countries has shifts. spreads generally a sign for economic policy expectation, as spread increases it could mean downward on inflation or better economic situation, whereas decrease could mean tightening from central bank, impacting investment and sign for bad economics.
by observing Lag plots, all the variables has similar conclusion; it is strongly linear, which is strongly autocorrelated and non-stationary. Also it is hard to observe seasonality.
among plots, spread rate plots are less linear and autocorrelated compare to others, it has less centric to the line.
For all the data, it has weak seasonlity as giving same conclusion as Lag plots.




ACF plot shows slow decay and showing high ACF and PACF shows after lag 0-1, it signicantly falls to line, showing correlation like past results
these data decays very radically, for crude oil and S&P500 it goes to negative value, which can say there is an inverse realtionship for observation k time step apart. for PACF, it also falls significantly after lag 0-1, showing correlation.
spread rate ACF also decays very radically, howevcer it doesn’t go to negative value except US spread rate, this might due that in time series plot, US rates are much radical compare to other country spread rate. for PACF plot, it can hold more lags; lag 2-3 or 4 is capable for spread rate plots, after that it is correlated. this result corresponds to lag plot since spread rate’s lag plot has less linear pattern compare to other data
Augmented Dickey-Fuller Test
data: jp_ts
Dickey-Fuller = -1.3225, Lag order = 18, p-value = 0.8653
alternative hypothesis: stationary
For all of categories, ADF test suggest it is non-statationary, since it has large p-value(0.05) which fails to reject null hypothesis. So, performing log transformation or differencing is well needed.










for all the data, first differencing ACF and PACF plot shows similar patterns, which seems stationary. this will later on goes to ADF test.
Augmented Dickey-Fuller Test
data: diff(log(jp_ts))
Dickey-Fuller = -18.43, Lag order = 18, p-value = 0.01
alternative hypothesis: stationary
Augmented Dickey-Fuller Test
data: diff(us_spread_ts)
Dickey-Fuller = -13.978, Lag order = 14, p-value = 0.01
alternative hypothesis: stationary
for all the data, compare to ADF test before differencing, now rejects null hypothesis due to small p- value, concluding it is stationary
jp_df <- data.frame(Date = time(jp_ts), Value = as.numeric(jp_ts))
jp_df$MA20 <- rollmean(jp_df$Value, 20, fill = NA, align = "right")
jp_df$MA50 <- rollmean(jp_df$Value, 50, fill = NA, align = "right")
jp_df$MA100 <- rollmean(jp_df$Value, 100, fill = NA, align = "right")
plot_ly(jp_df, x = ~Date) %>%
add_lines(y = ~Value, name = "JPY Rate", line = list(color = "black")) %>%
add_lines(y = ~MA20, name = "MA20", line = list(color = "blue")) %>%
add_lines(y = ~MA50, name = "MA50", line = list(color = "red")) %>%
add_lines(y = ~MA100, name = "MA100", line = list(color = "green")) %>%
layout(title = "JPY Currency Rate with Moving Averages",
xaxis = list(title = "Date"),
yaxis = list(title = "JPY rate"))uk_df <- data.frame(Date = time(uk_ts), Value = as.numeric(uk_ts))
uk_df$MA20 <- rollmean(uk_df$Value, 20, fill = NA, align = "right")
uk_df$MA50 <- rollmean(uk_df$Value, 50, fill = NA, align = "right")
uk_df$MA100 <- rollmean(uk_df$Value, 100, fill = NA, align = "right")
plot_ly(uk_df, x = ~Date) %>%
add_lines(y = ~Value, name = "GBP Rate", line = list(color = "black")) %>%
add_lines(y = ~MA20, name = "MA20", line = list(color = "blue")) %>%
add_lines(y = ~MA50, name = "MA50", line = list(color = "red")) %>%
add_lines(y = ~MA100, name = "MA100", line = list(color = "green")) %>%
layout(title = "GBP Currency Rate with Moving Averages",
xaxis = list(title = "Date"),
yaxis = list(title = "GBP rate"))eu_df <- data.frame(Date = time(eu_ts), Value = as.numeric(eu_ts))
eu_df$MA20 <- rollmean(eu_df$Value, 20, fill = NA, align = "right")
eu_df$MA50 <- rollmean(eu_df$Value, 50, fill = NA, align = "right")
eu_df$MA100 <- rollmean(eu_df$Value, 100, fill = NA, align = "right")
plot_ly(eu_df, x = ~Date) %>%
add_lines(y = ~Value, name = "EUR Rate", line = list(color = "black")) %>%
add_lines(y = ~MA20, name = "MA20", line = list(color = "blue")) %>%
add_lines(y = ~MA50, name = "MA50", line = list(color = "red")) %>%
add_lines(y = ~MA100, name = "MA100", line = list(color = "green")) %>%
layout(title = "EUR Currency Rate with Moving Averages",
xaxis = list(title = "Date"),
yaxis = list(title = "EUR rate"))oil_df <- data.frame(Date = time(crude_oil_ts), Value = as.numeric(crude_oil_ts))
oil_df$MA20 <- rollmean(oil_df$Value, 20, fill = NA, align = "right")
oil_df$MA50 <- rollmean(oil_df$Value, 50, fill = NA, align = "right")
oil_df$MA100 <- rollmean(oil_df$Value, 100, fill = NA, align = "right")
plot_ly(oil_df, x = ~Date) %>%
add_lines(y = ~Value, name = "Oil Price", line = list(color = "black")) %>%
add_lines(y = ~MA20, name = "MA20", line = list(color = "blue")) %>%
add_lines(y = ~MA50, name = "MA50", line = list(color = "red")) %>%
add_lines(y = ~MA100, name = "MA100", line = list(color = "green")) %>%
layout(title = "Crude Oil Price with Moving Averages",
xaxis = list(title = "Date"),
yaxis = list(title = "Oil Price"))sp500_df <- data.frame(Date = time(sp500_ts), Value = as.numeric(sp500_ts))
sp500_df$MA20 <- rollmean(sp500_df$Value, 20, fill = NA, align = "right")
sp500_df$MA50 <- rollmean(sp500_df$Value, 50, fill = NA, align = "right")
sp500_df$MA100 <- rollmean(sp500_df$Value, 100, fill = NA, align = "right")
plot_ly(sp500_df, x = ~Date) %>%
add_lines(y = ~Value, name = "S&P500", line = list(color = "black")) %>%
add_lines(y = ~MA20, name = "MA20", line = list(color = "blue")) %>%
add_lines(y = ~MA50, name = "MA50", line = list(color = "red")) %>%
add_lines(y = ~MA100, name = "MA100", line = list(color = "green")) %>%
layout(title = "S&P500 Index with Moving Averages",
xaxis = list(title = "Date"),
yaxis = list(title = "S&P500"))btc_df <- data.frame(Date = time(btc_ts), Value = as.numeric(btc_ts))
btc_df$MA20 <- rollmean(btc_df$Value, 20, fill = NA, align = "right")
btc_df$MA50 <- rollmean(btc_df$Value, 50, fill = NA, align = "right")
btc_df$MA100 <- rollmean(btc_df$Value, 100, fill = NA, align = "right")
plot_ly(btc_df, x = ~Date) %>%
add_lines(y = ~Value, name = "BTC Price", line = list(color = "black")) %>%
add_lines(y = ~MA20, name = "MA20", line = list(color = "blue")) %>%
add_lines(y = ~MA50, name = "MA50", line = list(color = "red")) %>%
add_lines(y = ~MA100, name = "MA100", line = list(color = "green")) %>%
layout(title = "Bitcoin Price with Moving Averages",
xaxis = list(title = "Date"),
yaxis = list(title = "BTC Price"))jp_spread_df <- data.frame(Date = time(jp_spread_ts), Value = as.numeric(jp_spread_ts))
jp_spread_df$MA20 <- rollmean(jp_spread_df$Value, 20, fill = NA, align = "right")
jp_spread_df$MA50 <- rollmean(jp_spread_df$Value, 50, fill = NA, align = "right")
jp_spread_df$MA100 <- rollmean(jp_spread_df$Value, 100, fill = NA, align = "right")
plot_ly(jp_spread_df, x = ~Date) %>%
add_lines(y = ~Value, name = "JP Spread", line = list(color = "black")) %>%
add_lines(y = ~MA20, name = "MA20", line = list(color = "blue")) %>%
add_lines(y = ~MA50, name = "MA50", line = list(color = "red")) %>%
add_lines(y = ~MA100, name = "MA100", line = list(color = "green")) %>%
layout(title = "Japan Spread with Moving Averages",
xaxis = list(title = "Date"),
yaxis = list(title = "Spread (bps)"))eu_spread_df <- data.frame(Date = time(eu_spread_ts), Value = as.numeric(eu_spread_ts))
eu_spread_df$MA20 <- rollmean(eu_spread_df$Value, 20, fill = NA, align = "right")
eu_spread_df$MA50 <- rollmean(eu_spread_df$Value, 50, fill = NA, align = "right")
eu_spread_df$MA100 <- rollmean(eu_spread_df$Value, 100, fill = NA, align = "right")
plot_ly(eu_spread_df, x = ~Date) %>%
add_lines(y = ~Value, name = "EU Spread", line = list(color = "black")) %>%
add_lines(y = ~MA20, name = "MA20", line = list(color = "blue")) %>%
add_lines(y = ~MA50, name = "MA50", line = list(color = "red")) %>%
add_lines(y = ~MA100, name = "MA100", line = list(color = "green")) %>%
layout(title = "Euro Spread with Moving Averages",
xaxis = list(title = "Date"),
yaxis = list(title = "Spread (bps)"))uk_spread_df <- data.frame(Date = time(uk_spread_ts), Value = as.numeric(uk_spread_ts))
uk_spread_df$MA20 <- rollmean(uk_spread_df$Value, 20, fill = NA, align = "right")
uk_spread_df$MA50 <- rollmean(uk_spread_df$Value, 50, fill = NA, align = "right")
uk_spread_df$MA100 <- rollmean(uk_spread_df$Value, 100, fill = NA, align = "right")
plot_ly(uk_spread_df, x = ~Date) %>%
add_lines(y = ~Value, name = "UK Spread", line = list(color = "black")) %>%
add_lines(y = ~MA20, name = "MA20", line = list(color = "blue")) %>%
add_lines(y = ~MA50, name = "MA50", line = list(color = "red")) %>%
add_lines(y = ~MA100, name = "MA100", line = list(color = "green")) %>%
layout(title = "UK Spread with Moving Averages",
xaxis = list(title = "Date"),
yaxis = list(title = "Spread (bps)"))us_spread_df <- data.frame(Date = time(us_spread_ts), Value = as.numeric(us_spread_ts))
us_spread_df$MA20 <- rollmean(us_spread_df$Value, 20, fill = NA, align = "right")
us_spread_df$MA50 <- rollmean(us_spread_df$Value, 50, fill = NA, align = "right")
us_spread_df$MA100 <- rollmean(us_spread_df$Value, 100, fill = NA, align = "right")
plot_ly(us_spread_df, x = ~Date) %>%
add_lines(y = ~Value, name = "US Spread", line = list(color = "black")) %>%
add_lines(y = ~MA20, name = "MA20", line = list(color = "blue")) %>%
add_lines(y = ~MA50, name = "MA50", line = list(color = "red")) %>%
add_lines(y = ~MA100, name = "MA100", line = list(color = "green")) %>%
layout(title = "US Spread with Moving Averages",
xaxis = list(title = "Date"),
yaxis = list(title = "Spread (bps)"))three MA: 20,50 ,100
for all the plots, 50 moving average aligns well with data, for 100 moving average, it seems like it is overfitting.